\documentclass{beamer}
\mode<presentation>
\usepackage{beamerthemesplit}
\usepackage{graphicx}
\title{Grid Racing}
\author{Tobias Poll, Thomas Markus, Achim Meyer}
\date{\today}

\begin{document}

\frame{
	\titlepage
}

\frame{
	\frametitle{Grid Racer}
	A sarsa agent using driving a car on a course from start to finish in as few steps as possible, implemented in a grid world.
	}


\frame{
	\frametitle{Overview}
	\begin{itemize}
	 	\item Requirements
		\item Architecture
		\item Reinforcement learing
		\item Results
		\item Conclusion
	\end{itemize}
}

\section{Requirements}
\frame{

	\frametitle{Requirements}
	\begin{itemize}
	 \item Learn a general policy for any track
	 \item Avoid obstacles
	 \item Find the fastest route towards the finish
	\end{itemize}
}


\section{Architecture}
\subsection{Track}
\frame{
	\frametitle{Track}

		\begin{itemize}
		\item Track consists of cells
		\item decelleration value per cell
		\begin{itemize}
		\item (i.e, mudd 0.2, a wall 1.0)
		\end{itemize}
		\item Checkpoints
		\item keeps track of newly visited cells
        \item New tracks can be specified using ASCII-art

		\end{itemize}
}

\subsection{Car}
\frame{
	\frametitle{Car}
	\begin{itemize}
 		\item Car has a speed and direction
 		\item Car must be turned 30 degrees each timestep
	\end{itemize}
}

\subsection{Agent}
\frame{
 	\frametitle{Agent}

	\begin{itemize}
	 \item Agent drives a car
	\item Agent sees only what is in front of it
	\item 4 possible actions for each timestep
	\begin{itemize}
		\item turn left
		\item turn right
		\item break
		\item accelerate
	\end{itemize}
	\end{itemize}
}

\frame{
\frametitle{Agent (2)}
	\begin{itemize}
	 \item View is subarray of the surface ahead
	 \item View rotates with the agent (reduced information)
	 \item Agent further receives: 
	\begin{itemize}
	     \item speed
		 \item direction
		\item x and y location on the track
    \end{itemize}
	\end{itemize}
}

\frame{

	\frametitle{GUI}
	\includegraphics[width=300px,height=60px]{aroundthewall.png} 

	
}


\section{Reinforcement learning}
\subsection{Agent overview}

\frame{
\frametitle{Agent Overview}
	\begin{itemize}
	    \item Sarsa
		\item seperate neural net for each action
	    \item 
	\end{itemize}
}


\subsection{Neural net}
\frame{
	\frametitle{Neurals net}

	\begin{itemize}
	 \item input: 4(direction, speed, x and y) + $viewwidth^2$
	 \item hidden: 30
	 \item output: 1 (the q value for that state action pair)
	 \item Seperate action networks
	\end{itemize}
}

\subsection{Rewards}

\frame{
\frametitle{Rewards}
	\begin{itemize}
	 \item -1 for each timestep
	 \item 0 for passing a checkpoint
	\item 10 for passing the finish
	\item 0.3 for each newly visited cell
	\end{itemize}
}

\frame{
\frametitle{Rewards (2)}
	\begin{itemize}
		\item Reward per visited cell should make the agent curious about new terrain
		\item Checkpoints give the agent intermediate results aiding convergence
	\end{itemize}
}

\subsection{Exploration}
\frame{
	\frametitle{Exploration}
	\begin{itemize}
		\item Starring at 1.0, but decreasing over time
		\item Exploration overcomes initial bias perhaps leading to extremely long episodes
	\end{itemize}
}


\section{Results}
\subsection{Different tracks, learning speeds}

\frame{

	\frametitle{Learning rates}
	\includegraphics[width=300px,height=200px]{result_learningrates.png}

}



\frame{

	\frametitle{Around the Wall}
	\includegraphics[width=300px,height=200px]{result_aroundthewall.png}

}

\frame{

	\frametitle{Around the Mud}
	\includegraphics[width=300px,height=200px]{result_aroundthemud.png}

}

\frame{

	\frametitle{Go Right then Down}
	\includegraphics[width=300px,height=200px]{result_gorightthendown.png}

}

\frame{

	\frametitle{Circular Simple}
	\includegraphics[width=300px,height=200px]{result_circularsimple.png}
}




\frame{

	\frametitle{Circular Complex}
	\includegraphics[width=300px,height=200px]{result_circularcomplex.png}

}




\section{Conclusions}
\subsection{Conclusions}
\frame{

	\frametitle{Conclusions}
    \begin{itemize}
     \item Agent learns to turn around corner and avoid obstacles
	\item Driving a lap around the track is much harder to lean
    \end{itemize}

}


\frame{
	\begin{center}
	{\Large Questions?}
	\end{center}
		
}


\end{document}
